library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
df <- readr::read_tsv("decomposition_proc.tsv")
## Rows: 394 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (14): vacc_name, vacc_vocab, p1, p2, p3, p4, p5, p6, m1, m2, m3, m4, m5, m6
## dbl  (1): vacc_id
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df2 <- df %>% 
  select(vacc_id, matches("^p|^m")) %>% 
  tidyr::gather("key", "value", -vacc_id) %>% 
  tidyr::separate("key", into = c("key", "num"), sep = -1) %>% 
  tidyr::spread("key", "value") %>% 
  rename(disease = p, mechanism = m) %>% 
  filter(!(is.na(disease) & is.na(mechanism))) %>% 
  # remove concepts without a disease
  filter(!is.na(disease)) %>% 
  arrange(vacc_id, num)

# df2 <- df2 %>% 
  # filter(disease %in% c("covid-19", "adenovirus"))

# df2 <- df2 %>% 
  # mutate(mechanism = NA_character_)
# single mechanism concepts
mechanism_single <- df2 %>% 
  filter(!is.na(mechanism)) %>% 
  select(disease, mechanism) %>% 
  distinct()
  
# single disease concepts
disease_single <- df2 %>% 
  mutate(mechanism = NA_character_) %>% 
  filter(!is.na(disease)) %>% 
  select(disease, mechanism) %>% 
  distinct()

# combination mechanism concepts
mechanism_combos <- df2 %>% 
  filter(!is.na(mechanism)) %>% 
  group_by(vacc_id) %>% 
  summarise(disease = str_c(disease, collapse = "|"), mechanism = str_c(mechanism, collapse = "|"), n = n()) %>% 
  ungroup() %>% 
  filter(n > 1) %>% 
  select(disease, mechanism) %>% 
  distinct()

# combination disease concepts
disease_combos <- df2 %>% 
  select(vacc_id, disease) %>% 
  distinct() %>% 
  group_by(vacc_id) %>% 
  summarise(disease = str_c(disease, collapse = "|"), n = n()) %>% 
  ungroup() %>% 
  filter(n > 1) %>% 
  mutate(mechanism = NA_character_) %>% 
  select(disease, mechanism) %>% 
  distinct()

# combine all expanded concepts into a single dataframe
expanded <- bind_rows(select(df2, disease, mechanism),
          mechanism_single, 
          mechanism_combos,
          disease_single,
          disease_combos) %>% 
  distinct() %>% 
  arrange(disease, mechanism) %>% 
  mutate(id = row_number()) %>% 
  select(id, everything())
# reformat the concepts into a formal context
fc <- expanded %>% 
  pivot_longer(cols = c(disease, mechanism)) %>% 
  # select(id, value) %>% 
  mutate(value = str_split(value, "\\|")) %>% 
  unnest(cols = "value") %>% 
  mutate(value = str_trim(value)) %>% 
  mutate(value = str_replace_all(value, "\\s|-", "_")) %>% 
  mutate(value = str_remove_all(value, "\\(|\\)|,")) %>% 
  mutate(x = "X") %>% 
  distinct() %>% 
  filter(!is.na(value)) %>% 
  mutate(value = paste0(toupper(str_sub(name, 1, 1)), "_", value)) %>% 
  select(-name) %>% 
  pivot_wider(names_from = "value", values_from = "x", values_fill = "")

write_csv(fc, "formal_context.csv")
def boiler(csv_filename, output_path):
  
  from concepts import Context
  import pandas as pd
  import os
  
  print("Using working directory: " + os.getcwd())
  output_path = os.getcwd() + "/" + output_path
  
  # create the context object from the csv file
  c = Context.fromfile(os.getcwd() + "/" + csv_filename, frmat='csv')
  
  # use the attributes (intent) to define the concept name
  def get_concept_name(concept):
    nm = "; ".join(list(concept.intent))
    return nm
    
  concept_list = [a for a in c.lattice]
  concept_names = [get_concept_name(a) for a in concept_list]
  
  print(len(concept_list))
  maps_to_list = []
  for idx, con in enumerate(concept_list):
      parent_concept_indexes = [concept_list.index(c) for c in list(con.upper_neighbors)]
      for parent_idx in parent_concept_indexes:
          maps_to_list.append((idx, parent_idx))
          
  # create the concept table. Make sure concept ids start with 0 which need to be fixed.
  concept_df = pd.DataFrame({"id" : range(len(concept_names)), "concept_name" : concept_names})
  
  # create the 'Is a' relationship table. Add 1 to concept ids so they start with 1 instead of 0.
  concept_relationship_df = pd.DataFrame({"id_1" : [x for x, _ in maps_to_list], 
                                          "relationship" : "Is a", 
                                          "id_2" : [x for _ , x in maps_to_list]})
                                          
  
  concept_df.to_csv(output_path + "concept.csv", index = False)
  concept_relationship_df.to_csv(output_path + "concept_relationship.csv", index = False)

# Run the boiler function using the formal context as input
boiler('formal_context.csv', 'new_vaccine_vocab_')
## Using working directory: /Users/adamblack/projects/FCA_boiler/cvx_hcpcs_icdproc_experiment
## 108
concept <- read_csv("new_vaccine_vocab_concept.csv") %>% 
  mutate(concept_name = ifelse(is.na(concept_name), "Vaccine", concept_name)) %>% 
  # filter(!is.na(concept_name)) #%>%
  # mutate(concept_name = ifelse(id == 0, "Vaccine", concept_name))
  {.}
## Rows: 108 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): concept_name
## dbl (1): id
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cr <- read_csv("new_vaccine_vocab_concept_relationship.csv") %>% 
  filter(id_1 %in% concept$id, id_2 %in% concept$id)
## Rows: 214 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): relationship
## dbl (2): id_1, id_2
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
concept
## # A tibble: 108 × 2
##       id concept_name                                                           
##    <dbl> <chr>                                                                  
##  1     0 D_adenovirus; M_adenovirus_live; D_anthrax; D_cholera; D_covid_19; M_c…
##  2     1 D_adenovirus; M_adenovirus_live                                        
##  3     2 D_anthrax                                                              
##  4     3 D_cholera                                                              
##  5     4 D_covid_19; M_covid_19_mRNA                                            
##  6     5 D_covid_19; M_covid_19_vector                                          
##  7     6 D_dengue                                                               
##  8     7 D_diphtheria; M_diphtheria_antitoxin                                   
##  9     8 D_diphtheria; M_diphtheria_toxoid; D_tetanus; M_tetanus_toxoid; D_pert…
## 10     9 D_diphtheria; M_diphtheria_toxoid; D_tetanus; M_tetanus_toxoid; D_pert…
## # … with 98 more rows
cr %>% 
  left_join(rename(concept, concept_name_1 = concept_name), by = c("id_1" = "id")) %>% 
  left_join(rename(concept, concept_name_2 = concept_name), by = c("id_2" = "id")) %>% 
  select(concept_name_1, relationship,  concept_name_2)
## # A tibble: 214 × 3
##    concept_name_1                   relationship concept_name_2                 
##    <chr>                            <chr>        <chr>                          
##  1 D_adenovirus; M_adenovirus_live… Is a         D_adenovirus; M_adenovirus_live
##  2 D_adenovirus; M_adenovirus_live… Is a         D_anthrax                      
##  3 D_adenovirus; M_adenovirus_live… Is a         D_cholera                      
##  4 D_adenovirus; M_adenovirus_live… Is a         D_covid_19; M_covid_19_mRNA    
##  5 D_adenovirus; M_adenovirus_live… Is a         D_covid_19; M_covid_19_vector  
##  6 D_adenovirus; M_adenovirus_live… Is a         D_dengue                       
##  7 D_adenovirus; M_adenovirus_live… Is a         D_diphtheria; M_diphtheria_ant…
##  8 D_adenovirus; M_adenovirus_live… Is a         D_diphtheria; M_diphtheria_tox…
##  9 D_adenovirus; M_adenovirus_live… Is a         D_diphtheria; M_diphtheria_tox…
## 10 D_adenovirus; M_adenovirus_live… Is a         D_diphtheria; D_tetanus; D_per…
## # … with 204 more rows

Visualize Boiler Output

plt <- g %>% 
  # activate(nodes) %>% 
  # filter(str_detect(name, "D_tetanus")) %>% 
  ggraph('fr') + 
  geom_edge_link(arrow = arrow(angle = 20, length = unit(0.15, "inches"), ends = "last", type = "open")) +
  geom_node_point() + 
  coord_fixed() +
  ggraph::geom_node_text(aes(label = display_name), repel = T, force = 100)

# plt
ggsave("vaccines.png", plt, width = 30, height = 30)
plt <- g %>% 
  activate(nodes) %>% 
  filter(str_detect(name, "D_tetanus")) %>% 
  ggraph('fr') + 
  geom_edge_link(arrow = arrow(angle = 20, length = unit(0.15, "inches"), ends = "last", type = "open")) +
  geom_node_point() + 
  coord_fixed() +
  ggraph::geom_node_text(aes(label = display_name), repel = T, force = 100)

ggsave("tetanus.png", plt, width = 30, height = 30)
plt <- g %>% 
  activate(nodes) %>% 
  filter(str_detect(name, "D_measles")) %>% 
  ggraph('fr') + 
  geom_edge_link(arrow = arrow(angle = 20, length = unit(0.15, "inches"), ends = "last", type = "open")) +
  geom_node_point() + 
  coord_fixed() +
  ggraph::geom_node_text(aes(label = display_name), repel = T, force = 100)

ggsave("measles.png", plt, width = 10, height = 10)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(igraph)
## 
## Attaching package: 'igraph'
## The following object is masked from 'package:plotly':
## 
##     groups
## The following object is masked from 'package:tidygraph':
## 
##     groups
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(igraphdata)

data(karate, package="igraphdata")
# G <- upgrade_graph(karate)
G <- g
# L <- layout.circle(G)
L <- layout.auto(G)

nm <- g %>% 
  activate(nodes) %>% 
  pull(display_name)

# Create Vertices and Edges
vs <- V(G)
es <- as.data.frame(get.edgelist(G))

Nv <- length(vs)
Ne <- length(es[1]$V1)
  
# Create Nodes
library(plotly)

Xn <- L[,1]
Yn <- L[,2]

names(Xn) <- g %>% activate(nodes) %>% pull(name)
names(Yn) <- g %>% activate(nodes) %>% pull(name)

# network <- plot_ly(x = ~Xn, y = ~Yn, mode = "markers", text = vs$label, hoverinfo = "text")
network <- plot_ly(x = ~Xn, y = ~Yn, mode = "markers", text = nm, hoverinfo = "text")
  
# Creates Edges
i=1
edge_shapes <- list()
for(i in 1:Ne) {
  v0 <- es[i,]$V1
  v1 <- es[i,]$V2

  edge_shape = list(
    type = "line",
    line = list(color = "#030303", width = 0.3),
    x0 = Xn[v0],
    y0 = Yn[v0],
    x1 = Xn[v1],
    y1 = Yn[v1]
  )

  edge_shapes[[i]] <- edge_shape
}
  
# Create Network
axis <- list(title = "", showgrid = FALSE, showticklabels = FALSE, zeroline = FALSE)

fig <- layout(
  network,
  title = 'FCA Boiler Vaccine Graph',
  shapes = edge_shapes,
  xaxis = axis,
  yaxis = axis
)

# plotly::add_annotations(fig, "",   
#   x=30,  # arrows' head
#   y=30,  # arrows' head
#   ax=40,  # arrows' tail
#   ay=40,  # arrows' tail
#   xref='x',
#   yref='y',
#   axref='x',
#   ayref='y',
#   text='',  # if you want only the arrow
#   showarrow=T,
#   arrowhead=3,
#   arrowsize=1,
#   arrowwidth=1,
#   arrowcolor='black')

fig
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter